{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Example 4: Reading and Writing Benchmark\n",
    "\n",
    "## Hardware Specification for Rerun\n",
    "\n",
    "Desktop workstation with 2x (AMD EPYC 7702 64-Core) with total of 128 physical and 256 logical cores, 1024 GB DDR4 with Ubuntu 22.04 LTS operating system."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This script is to show the process of timing measurement\n",
    "import molli as ml\n",
    "import timeit\n",
    "\n",
    "N = 3\n",
    "\n",
    "# ml.aux.assert_molli_version_min(\"1.0.0b2\")\n",
    "\n",
    "_clib = ml.ConformerLibrary(\"bpa_aligned.clib\")\n",
    "\n",
    "# Loading all ensembles into memory\n",
    "# So that we are only testing the writing speeds\n",
    "with _clib.reading():\n",
    "    ensembles = {k: v for k, v in _clib.items()}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "These are the varying collection formats available in the back end of `molli`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "def _dir_col(path, overwrite=False):\n",
    "    return ml.storage.Collection(\n",
    "        path,\n",
    "        backend=ml.storage.DirCollectionBackend,\n",
    "        value_decoder=lambda x: ml.ConformerEnsemble.loads_mol2(x.decode()),\n",
    "        value_encoder=lambda x: ml.ConformerEnsemble.dumps_mol2(x).encode(),\n",
    "        ext=\".mol2\",\n",
    "        readonly=False,\n",
    "        overwrite=overwrite,\n",
    "    )\n",
    "\n",
    "\n",
    "def _zip_col(path, overwrite=False):\n",
    "    return ml.storage.Collection(\n",
    "        path,\n",
    "        backend=ml.storage.ZipCollectionBackend,\n",
    "        value_decoder=lambda x: ml.ConformerEnsemble.loads_mol2(x.decode()),\n",
    "        value_encoder=lambda x: ml.ConformerEnsemble.dumps_mol2(x).encode(),\n",
    "        ext=\".mol2\",\n",
    "        readonly=False,\n",
    "        overwrite=overwrite,\n",
    "    )\n",
    "\n",
    "\n",
    "def _ukv_col(path, overwrite=False):\n",
    "    return ml.ConformerLibrary(\n",
    "        path,\n",
    "        readonly=False,\n",
    "        overwrite=overwrite,\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Writing Times"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Writing times bpa_test.clib 0.3480684249952901 [0.3782551420008531, 0.34833620399876963, 0.35106149199418724, 0.3510784639947815, 0.3480684249952901]\n",
      "Writing times bpa_test.zip 93.47247516000061 [93.87680340299266, 94.93543296400458, 93.47247516000061, 93.61168274100055, 94.20502133599075]\n",
      "Writing times bpa_test 93.41699376401084 [94.56352856599551, 93.70587370399153, 93.74676171099418, 93.41699376401084, 96.13699470600113]\n"
     ]
    }
   ],
   "source": [
    "# Note: bpa_test_deflate5.zip is not here as you cannot write into the compressed format\n",
    "for prep, path in (\n",
    "    (_ukv_col, \"bpa_test.clib\"),\n",
    "    (_zip_col, \"bpa_test.zip\"),\n",
    "    (_dir_col, \"bpa_test\"),\n",
    "):\n",
    "\n",
    "    clib_write_times = timeit.Timer(\n",
    "        stmt=\"\"\"with library.writing():\\n    for k, v in ensembles.items(): library[k]=v\"\"\",\n",
    "        setup=\"\"\"library = prep(path, overwrite=True)\"\"\",\n",
    "        globals=globals(),\n",
    "    ).repeat(5, number=1)\n",
    "\n",
    "    print(\"Writing times\", path, min(clib_write_times), clib_write_times, flush=True)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Reading Times"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Read times bpa_test.clib 1.0547445749980398 [1.0947346960019786, 1.0660112299956381, 1.0552394859987544, 1.0547445749980398, 1.0610972559952643]\n",
      "Read times bpa_test.zip 154.67337175300054 [154.67337175300054, 166.21870341600152, 157.39443366299383, 160.86328307799704, 163.08083603800333]\n",
      "Read times bpa_test_deflate5.zip 160.31052669300698 [160.31052669300698, 168.26776486101153, 162.00970906899602, 172.00367274400196, 174.04174041900842]\n",
      "Read times bpa_test 162.5466837839922 [162.5466837839922, 171.23339160499745, 165.0368733159994, 165.7297141720046, 167.20620242299628]\n"
     ]
    }
   ],
   "source": [
    "# Note: bpa_test_deflate5.zip is written from the compressed \"bpa_test\" directory created after the first one\n",
    "for prep, path in (\n",
    "    (_ukv_col, \"bpa_test.clib\"),\n",
    "    (_zip_col, \"bpa_test.zip\"),\n",
    "    (_zip_col, \"bpa_test_deflate5.zip\"),\n",
    "    (_dir_col, \"bpa_test\"),\n",
    "):\n",
    "    clib_read_times = timeit.Timer(\n",
    "        stmt=\"\"\"with library.reading():\\n    for k, v in library.items(): pass\"\"\",\n",
    "        setup=\"\"\"library = prep(path, overwrite=False)\"\"\",\n",
    "        globals=globals(),\n",
    "    ).repeat(5, number=1)\n",
    "\n",
    "    print(\"Read times\", path, min(clib_read_times), clib_read_times, flush=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dev-blake",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
